/*
PROGRAM BY: Teresa Fort
Last edited 04/23/2020

This program makes a firm-country-hs10 level dataset that distinguishes imports by type

It flags imports as inputs or produced goods using industry or firm-level information;


*/ 

%include 'yyyy/pdata.sas';

options obs=max;

libname ecroot 'xxxx/data';
%let input=xxxx/input ;

%let lbdvers=c201601 ;
%let output=xxxx/data ;





*1. Bring in the Trade data;
*************************************;
data imports;
    set pdata.imp_comb2007_&lbdvers ;
    hs6=substr(hs,1,6);
    if related="Y" then rp_value=value ;
    *want to keep consumption imports only;
    if type=1 | type=2 | type=5;
    if firmid~=" " ;
run;
*************************************;
  
 



*2. Aggregate to yearly level (easy to change to monthly level);
*************************************;
proc sort data=imports; by firmid hs country; run;

proc means sum noprint data=imports;
   by firmid hs country;
   var value rp_value;
   output out=imports02(drop=_type_ _freq_)  sum()= ;
run; 
*************************************;

  
  proc export data = imports02
    file = "&output/temp_imports.dta"
    dbms=stata replace;
run;

*3. Firm-by-industry-hs-country (repeat import values by naics);
*************************************;
*a) Make a firmid-naics dataset;
************;
data firm_naics01;
   set ecroot.estabs2007_clean(keep=firmid naics07_census emp_census sales rename=(naics07_census=naics_cmf) );
run;

proc sort data=firm_naics01; by firmid naics_cmf; run;

proc means sum noprint data=firm_naics01;
   by firmid naics_cmf;
   var emp_census sales;
   output out=firm_naics02(drop=_freq_ _type_) sum()=;
run;
***********;

*b) Joinby to naics-hs bridges; 
***********;
 *i) input codes;
 proc sql ;
  create table firm_naics_hs_in as 
  select A.firmid, A.naics_cmf, B.*
  from firm_naics02 A left join ecroot.naics_out_hs_in B 
    on A.naics_cmf=B.naics_cmf;
 quit;
 
 *ii) output codes;
  proc sql ;
  create table firm_naics_hs_prod as 
  select A.firmid, A.naics_cmf, B.*
  from firm_naics02 A left join ecroot.naics_out_hs_prod B 
    on A.naics_cmf=B.naics_cmf;
 quit;
***********;

*c) Collapse to firmid-hs level and rename variables for merge;
*************************************;
 %macro ctype(t); 
 proc sort data=firm_naics_hs_&t; by firmid hs; run;
 proc means sum noprint data=firm_naics_hs_&t;
    by firmid hs;
    var mlevel: ;
    output out=firm_naics_hs_&t._02(drop=_type_) sum()=;
 run;

 data firm_naics_hs_&t._03(drop=mlevel:) ;
    set firm_naics_hs_&t._02(drop=_freq_);
    %macro lev();
      %do i=3 %to 6 ;
      naics_&t._l&i=mlevel_&i ;
      label naics_&t._l&i="Merge level of &t code to HS" ;
      %end;
      %mend;
      %lev;
 run;   
 %mend;
 %ctype(in);
 %ctype(prod);
*************************************;


*4. Firm-hs-country dataset merged by firmid-hs for inuts and prods;
*************************************;
proc sort data=ecroot.firm_hs_inputs out=firm_hs_inputs; by firmid hs ; run;
proc sort data=ecroot.firm_hs_prods out=firm_hs_prods; by firmid hs ; run;

data firm_flags;
   merge  imports02		(in=a) 
	  firm_hs_inputs    	(in=b) 
	  firm_hs_prods		(in=c rename=(mlevel_6=plevel_6 mlevel_5=plevel_5 mlevel_4=plevel_4 mlevel_3=plevel_3))
	  firm_naics_hs_in_03   (in=d)
	  firm_naics_hs_prod_03 (in=e)
	  ;
   by firmid hs ;
   if b=1 then f_input=1;
   if c=1 then f_prod=1;
   if d=1 then n_input=1;
   if e=1 then n_prod=1;
   if a=1 ;
run;
*************************************;

*5. Add manuf, short form, and mat-prod data indicators;
*************************************;
proc sort nodupkey data=ecroot.cmf_mat_match2007 out=in_mat  (keep=firmid); by firmid; run;
proc sort nodupkey data=ecroot.cmf_prod_match2007 out=in_prod(keep=firmid); by firmid; run;

proc sort data=ecroot.estabs2007_clean out=cmf(keep=firmid shortform ar); by firmid; run;

data cmf2;
  set cmf;
  if shortform=1 then short=1;
  if shortform=0 then short=0;
run;  

proc means sum noprint data=cmf2; 
   by firmid;
   var short ar; 
output out=cmf3(drop=_freq_ _type_ ) sum()= ;
run;

data firm_flags2 ;
  merge firm_flags(in=a) cmf3(in=b) in_mat(in=c) in_prod(in=d);
  by firmid;
  if b=1 then in_cmf=1;
  if c=1 then in_mat=1;
  if d=1 then in_prod=1;
  if a=1;
run;

*************************************;

*6. Output final dataset;
*************************************;
proc sort data=firm_flags2; by firmid hs country; run;

data ecroot.firm_imports_type;
   set firm_flags2;
run;

proc export data = ecroot.firm_imports_type
    file = "&output/firm_imports_type_2007.dta"
    dbms=stata replace;
run;
*************************************;

/*
*7.  Delete intermediate SAS datasets that were made in the script;
*************************************;
proc datasets library=ecroot;
  save firm_imports_type;
run;
*/




